Analysis date: 2021-03-30

Setup

Load libraries

library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(readxl)
library(DESeq2)
library(tidyverse)
library(limma)
library(apeglm)
library(biomartr)
library(biomaRt)
library(MultiAssayExperiment)
library(gplots)
library(grid)
library(cowplot)

Load data

load("data/multiomics_MAE.RData")
source("data/Figure_layouts.R")

KEGG <- read.table("data/c2.cp.kegg.v7.0.symbols.gmt", sep = "/")
KEGG <- KEGG %>% as_tibble() %>% mutate(pathway = gsub("\thttp:", "", V1),
                                        genes=V7)
BCAA_genes <- KEGG %>% filter(pathway == "KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
proteasome_genes <- KEGG %>% filter(pathway == "KEGG_PROTEASOME") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
splice_genes <- KEGG %>% filter(pathway == "KEGG_SPLICEOSOME") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
BCR_genes <- KEGG %>% filter(pathway == "KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]

Functions

Rename deletions and gains functions

change_chr_abber_brackets <- function(levels_alt){
levels_alt %>% 
  str_replace(., "del", "del(") %>% str_replace(., "gain", "gain(") %>%
  str_replace(., "q", ")(q") %>% str_replace(., "p", ")(p") %>% 
  str_replace(., "\\)\\(prim", "prim") %>% 
  str_replace(., "11\\)\\(q", "11)(q22.3)") %>% 
  str_replace(., "p13", "p13)") %>% str_replace(., "q14", "q14)") %>%
  str_replace(., "q24", "q24)")
}

Format data

Proteomics

prot_few_nas <- multiomics_MAE[["proteomics"]] %>% is.na() %>% rowSums()
prot_few_nas <- prot_few_nas[ prot_few_nas == 0 ] %>% names()

Biomart

Get annotation from biomart
#ensembl=useMart("ensembl")
#ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)
#genemap <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "start_position", "end_position" , "chromosome_name", "description"), 
#    filters = "hgnc_symbol", values = (prot_few_nas %>% unique), mart = ensembl)
#genemap <- genemap %>% as_tibble() %>% mutate(mean_position=(start_position + end_position)/2)
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/R_objects/ensembl_proteins_location.RData")
Add gene location to MAE
metadata(multiomics_MAE)$gene_symbol_mapping <- left_join(metadata(multiomics_MAE)$gene_symbol_mapping, genemap)
Create proteomics_tbl_meta_biomart tbl
proteomics_tbl_meta_biomart_chrab <- wideFormat(multiomics_MAE[,,"chrom_abber"]) %>% as_tibble()
proteomics_tbl_meta_biomart_chrab <- mutate_if(proteomics_tbl_meta_biomart_chrab, is.numeric, as.logical)
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>%  dplyr::select(primary, 
                colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
                  apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ all(table(cl)>2) } )
                  ] )
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>%  dplyr::select(primary, 
                colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
                  apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ length(table(cl))>1 } )
                  ] )

proteomics_tbl_meta_biomart_SNP <- wideFormat(multiomics_MAE[,,"SNPs"]) %>% as_tibble()
proteomics_tbl_meta_biomart_SNP <- mutate_if(proteomics_tbl_meta_biomart_SNP, is.numeric, as.logical)
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>%  dplyr::select(primary, 
                colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
                  apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ all(table(cl)>2) } )
                  ] )
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>%  dplyr::select(primary, 
                colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
                  apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ length(table(cl))>1 } )
                  ] )

proteomics_tbl_meta_biomart_health <- wideFormat(multiomics_MAE[,,"health_record_bin"]) %>% as_tibble() %>% dplyr::select(primary, health_record_bin_IGHV_mutated, health_record_bin_elderly_at_diagnosis, health_record_bin_treated)
proteomics_tbl_meta_biomart_health <- mutate_if(proteomics_tbl_meta_biomart_health, is.numeric, as.logical)

proteomics_tbl_meta_biomart <- left_join(
  left_join((
    longFormat(multiomics_MAE[,,"proteomics"], colDataCols = c("trisomy12", "IGHV_mutated") ) %>% 
               as_tibble() %>%
               #mutate(IGHV= if_else(IGHV %in% c("M", "U"), IGHV, "NA") )
               mutate(IGHV=IGHV_mutated)
             ), 
                                         metadata(multiomics_MAE)$gene_symbol_mapping[c(2,3,5:7)], 
                                         by=c("rowname"="hgnc_symbol")),
  proteomics_tbl_meta_biomart_chrab, by=c("primary"))
proteomics_tbl_meta_biomart <- left_join( left_join(proteomics_tbl_meta_biomart,
          proteomics_tbl_meta_biomart_SNP, by=c("primary")),
          proteomics_tbl_meta_biomart_health, by=c("primary"))
Create Drug and Proteomics objects
drug_and_proteomics_prot <- longFormat(multiomics_MAE[,,c("proteomics")]) %>% as_tibble()
drug_and_proteomics_drug <- longFormat(multiomics_MAE[,,c("drug_resp_mono")]) %>% as_tibble()
drug_and_proteomics_drug <- drug_and_proteomics_drug %>% separate(rowname, into = c("Drug", "conc"), sep = "_") %>% 
  group_by(assay, primary, rowname=Drug, colname ) %>%
  dplyr::summarise(value=mean(value, na.rm=TRUE)) %>% ungroup()
drug_and_proteomics <- bind_rows(drug_and_proteomics_prot, drug_and_proteomics_drug)

pats_drug_and_prot <- drug_and_proteomics %>% group_by(primary) %>% dplyr::summarise(nassay=length(unique(assay))) %>% dplyr::filter(nassay==2) %>% .$primary
drug_and_proteomics <- drug_and_proteomics %>% dplyr::filter(primary %in% pats_drug_and_prot)
all_prot <- drug_and_proteomics %>% dplyr::filter(assay=="proteomics", rowname %in% prot_few_nas) %>% .$rowname %>% unique()

all_drugs <- drug_and_proteomics %>% dplyr::filter(assay=="drug_resp_mono") %>% .$rowname %>% unique()

Overlapping patients proteomics and RNASeq

pat_overlap_prot_RNA <- colnames(intersectColumns(multiomics_MAE[,,c("proteomics", "RNAseq_norm")]))[["proteomics"]]

This is what the object looks like

proteomics_tbl_meta_biomart
experiments(multiomics_MAE)
## ExperimentList class object of length 8:
##  [1] SNPs: matrix with 8 rows and 80 columns
##  [2] drug_resp_mono: data.frame with 130 rows and 81 columns
##  [3] chrom_abber: data.frame with 5 rows and 81 columns
##  [4] health_record_bin: matrix with 6 rows and 81 columns
##  [5] health_record_scaled: matrix with 6 rows and 81 columns
##  [6] proteomics: matrix with 7311 rows and 68 columns
##  [7] RNAseq_full: DESeqDataSet with 63214 rows and 59 columns
##  [8] RNAseq_norm: data.frame with 9273 rows and 59 columns
as_tibble(colData(multiomics_MAE))
multiomics_MAE
## A MultiAssayExperiment object of 8 listed
##  experiments with user-defined names and respective classes.
##  Containing an ExperimentList class object of length 8:
##  [1] SNPs: matrix with 8 rows and 80 columns
##  [2] drug_resp_mono: data.frame with 130 rows and 81 columns
##  [3] chrom_abber: data.frame with 5 rows and 81 columns
##  [4] health_record_bin: matrix with 6 rows and 81 columns
##  [5] health_record_scaled: matrix with 6 rows and 81 columns
##  [6] proteomics: matrix with 7311 rows and 68 columns
##  [7] RNAseq_full: DESeqDataSet with 63214 rows and 59 columns
##  [8] RNAseq_norm: data.frame with 9273 rows and 59 columns
## Features:
##  experiments() - obtain the ExperimentList instance
##  colData() - the primary/phenotype DFrame
##  sampleMap() - the sample availability DFrame
##  `$`, `[`, `[[` - extract colData columns, subset, or experiment
##  *Format() - convert into a long or wide DFrame
##  assays() - convert ExperimentList to a SimpleList of matrices
assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[1:5,1:5]
## harmonizing input:
##   removing 522 sampleMap rows not in names(experiments)
##   removing 13 colData rownames not in sampleMap 'primary'
##             CLL_1        CLL_4       CLL_8      CLL_20      CLL_24
## A2M   -0.23106599 -0.123775317 -0.20324611 -0.11422224  0.05369119
## AAAS  -0.04446032 -0.003929938 -0.10881390  0.21728422 -0.24019198
## AACS   0.18091637 -0.469318906  0.01926965  0.46120353  0.27485862
## AAGAB  0.11577450  0.129642752  0.10785538 -0.06247763  0.25647801
## AAMDC  0.21443028 -0.003820597 -0.09936349  0.62120261 -0.26938714
dim(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]))
## harmonizing input:
##   removing 522 sampleMap rows not in names(experiments)
##   removing 13 colData rownames not in sampleMap 'primary'
## [1] 7311   68

Some numbers for the different datasets

print(paste( round( (!(assay(multiomics_MAE[,,"proteomics"]) %>% is.na() ) ) %>% colSums() %>% mean) , 
             "proteins per sample were detected on average in proteomics data"))
## [1] "7311 proteins per sample were detected on average in proteomics data"
print(paste( round( (!(assay(multiomics_MAE[,,"RNAseq_full"]) %>% is.na() ) ) %>% 
                      colSums() %>% mean) , 
             "transcipts per sample were detected on average in RNASeq data"))
## [1] "63214 transcipts per sample were detected on average in RNASeq data"
if(!any(is.na(assay(multiomics_MAE[,,"RNAseq_full"])))){
  print("No NAs in RNASeq dataset")
}
## [1] "No NAs in RNASeq dataset"
print( paste( ( (assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) %>% sum , "different transcripts were detected"))
## [1] "40083 different transcripts were detected"
print(
  paste(
    multiomics_MAE@metadata$gene_symbol_mapping %>% 
  filter(ensembl_gene_id %in%
           names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
  dplyr::select(hgnc_symbol) %>% unique %>%
  filter(hgnc_symbol != "") %>% 
  nrow, 
  "transcripts with unique hgnc symbols"))
## [1] "35402 transcripts with unique hgnc symbols"
print( paste(
(multiomics_MAE@metadata$gene_symbol_mapping %>% 
  filter(ensembl_gene_id %in%
           names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
  dplyr::select(hgnc_symbol) %>% unique %>%
  filter(hgnc_symbol != "") %>% .$hgnc_symbol %in%
rownames(multiomics_MAE@ExperimentList$proteomics) ) %>% sum,
"matching proteins and transcripts detected"))
## [1] "7199 matching proteins and transcripts detected"
protpats <- multiomics_MAE@sampleMap %>% as_tibble() %>% filter(assay=="proteomics") %>% .$primary %>% unique
message("Available datasets for proteomics patients:")
multiomics_MAE[,protpats,]@sampleMap %>% .$assay %>% table
## .
##                 SNPs       drug_resp_mono          chrom_abber 
##                   67                   68                   68 
##    health_record_bin health_record_scaled           proteomics 
##                   68                   68                   68 
##          RNAseq_full          RNAseq_norm 
##                   59                   59

Oncoplot

order_oncoplot <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% 
  as_tibble() %>% 
  group_by(rowname) %>%
  summarise(total=sum(value, na.rm = TRUE)) %>%
  arrange(total ) %>%
  .$rowname %>% change_chr_abber_brackets

tmp <- wideFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble() 
colnames(tmp) <- colnames(tmp) %>% gsub("SNPs_|chrom_abber_", "",.) %>% 
  change_chr_abber_brackets
for(l in 1:length(order_oncoplot)){
  tmp <- tmp %>% arrange_( as.name(order_oncoplot[l] ) )
}

onco_center <- 
  longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% 
  as_tibble() %>% 
  mutate(alteration= if_else(value==1, assay, as.character(value))) %>%
  mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
  ggplot(aes(primary, rowname, fill=alteration)) + 
  geom_tile(color="black") + 
  scale_fill_manual(values=c("white", "orange1", "#ca0020", "grey"), labels=c("wt", "CNV", "SNV", "NA" ), na.translate=FALSE) + 
  scale_y_discrete(limits=order_oncoplot) +
  scale_x_discrete(limits=rev(tmp$primary)) +
  xlab("Patients") +
  theme(panel.background = element_rect(fill= "darkgrey"), panel.grid = element_blank(), 
        axis.text.x = element_blank(), 
        #axis.title = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.x = element_blank(), 
        legend.position = "bottom", legend.key.size = unit(10, "pt")) +
  guides(fill=guide_legend(title = NULL ))

onco_right <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% 
  as_tibble() %>%
  mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
  ggplot(aes(rowname, value)) + geom_col(aes(fill=assay)) +
  scale_x_discrete(limits=order_oncoplot) +
  coord_flip()+
  scale_fill_manual(values=c("orange1", "#ca0020")) +
  theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
        axis.line.x = element_line(color="black")) 

onco_right_perc <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble()  %>% 
  group_by(rowname) %>% 
  dplyr::summarise(perc=round(sum(value, na.rm = TRUE)/ sum(!is.na(value)) , 2) ) %>% 
  mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
  ggplot(aes(rowname, 1, label=paste0(perc*100, "%") )) + 
  geom_text() + coord_flip() + theme_void() + 
  scale_x_discrete(limits=order_oncoplot)

onco_right_total <- 
  longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble()  %>% 
  group_by(rowname) %>% 
  dplyr::summarise(total=sum(value, na.rm = TRUE )) %>% 
  mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
  ggplot(aes(rowname, 1, label=total)) + 
  geom_text() + coord_flip() + theme_void() + 
  scale_x_discrete(limits=order_oncoplot)

onco_top <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% 
  as_tibble() %>%
  mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
  ggplot(aes(primary, value)) + geom_col(aes(fill=assay)) +
  scale_x_discrete(limits=rev(tmp$primary)) +
  scale_fill_manual(values=c("orange1", "#ca0020")) +
  theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
        axis.line.x = element_line(color="black"))

p1 <- insert_yaxis_grob(onco_center,get_panel(onco_right_total) , grid::unit(.2, "null"), position = "right")
p1.2<- insert_yaxis_grob(p1, get_panel(onco_right), grid::unit(.1, "null"), position = "right")
p1.3<- insert_yaxis_grob(p1.2, get_panel(onco_right_perc), grid::unit(.2, "null"), position = "right")
oncoplot <- insert_xaxis_grob(p1.3, onco_top, grid::unit(.2, "null"), position = "top")
ggdraw(oncoplot)

Drug classes

drs_sel <- metadata(multiomics_MAE)$drugs_functional_groups %>% 
  filter( !( grepl("DMSO", Drug)   ) )  %>% 
  mutate(Pathways=if_else(Pathways=="Apoptosis", "Inducer/inhibitor of apoptosis", Pathways))
order_drugs <- drs_sel$Pathways %>% table() %>% as_tibble() %>% arrange(desc(n)) %>% .$.
drug_nrs <- drs_sel %>%
  ggplot(aes(Pathways)) + geom_bar() + 
  pp_sra_noguides_tilted +
  scale_x_discrete(limits=order_drugs)

drug_nrs

Save the workspace for further scripts and important plots

save(all_drugs,
     BCAA_genes, proteasome_genes, splice_genes, BCR_genes,
     drug_and_proteomics, drug_and_proteomics_drug,
     multiomics_MAE,
     pat_overlap_prot_RNA,
     prot_few_nas,
     proteomics_tbl_meta_biomart, 
     proteomics_tbl_meta_biomart_chrab, 
     proteomics_tbl_meta_biomart_health, 
     proteomics_tbl_meta_biomart_SNP,
      file="data/CLL_Proteomics_Setup.RData")

save(oncoplot,  
file = "RData_plots/CLL_Proteomics_Setup_Plots.RData")

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.1.1               gplots_3.1.1               
##  [3] MultiAssayExperiment_1.14.0 biomaRt_2.44.4             
##  [5] biomartr_0.9.2              apeglm_1.10.0              
##  [7] limma_3.44.3                forcats_0.5.1              
##  [9] stringr_1.4.0               dplyr_1.0.3                
## [11] purrr_0.3.4                 readr_1.4.0                
## [13] tidyr_1.1.2                 tibble_3.0.6               
## [15] tidyverse_1.3.0             DESeq2_1.28.1              
## [17] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [19] matrixStats_0.58.0          Biobase_2.48.0             
## [21] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [23] IRanges_2.22.2              S4Vectors_0.26.1           
## [25] BiocGenerics_0.34.0         readxl_1.3.1               
## [27] ggpubr_0.4.0                Hmisc_4.4-2                
## [29] ggplot2_3.3.3               Formula_1.2-4              
## [31] survival_3.2-7              lattice_0.20-41            
## [33] Matrix_1.3-2                pheatmap_1.0.12            
## [35] gtools_3.8.2                plyr_1.8.6                 
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1        BiocFileCache_1.12.1   splines_4.0.2         
##   [4] BiocParallel_1.22.0    digest_0.6.27          htmltools_0.5.1.1     
##   [7] magrittr_2.0.1         checkmate_2.0.0        memoise_2.0.0         
##  [10] cluster_2.1.0          openxlsx_4.2.3         Biostrings_2.56.0     
##  [13] annotate_1.66.0        modelr_0.1.8           askpass_1.1           
##  [16] bdsmatrix_1.3-4        prettyunits_1.1.1      jpeg_0.1-8.1          
##  [19] colorspace_2.0-0       rappdirs_0.3.3         blob_1.2.1            
##  [22] rvest_0.3.6            haven_2.3.1            xfun_0.20             
##  [25] crayon_1.4.0           RCurl_1.98-1.2         jsonlite_1.7.2        
##  [28] genefilter_1.70.0      glue_1.4.2             gtable_0.3.0          
##  [31] zlibbioc_1.34.0        XVector_0.28.0         car_3.0-10            
##  [34] abind_1.4-5            scales_1.1.1           mvtnorm_1.1-1         
##  [37] DBI_1.1.1              rstatix_0.6.0          Rcpp_1.0.6            
##  [40] xtable_1.8-4           progress_1.2.2         emdbook_1.3.12        
##  [43] htmlTable_2.1.0        foreign_0.8-81         bit_4.0.4             
##  [46] htmlwidgets_1.5.3      httr_1.4.2             RColorBrewer_1.1-2    
##  [49] ellipsis_0.3.1         farver_2.0.3           pkgconfig_2.0.3       
##  [52] XML_3.99-0.5           nnet_7.3-15            dbplyr_2.0.0          
##  [55] locfit_1.5-9.4         labeling_0.4.2         tidyselect_1.1.0      
##  [58] rlang_0.4.10           AnnotationDbi_1.50.3   munsell_0.5.0         
##  [61] cellranger_1.1.0       tools_4.0.2            cachem_1.0.1          
##  [64] cli_2.3.0              generics_0.1.0         RSQLite_2.2.3         
##  [67] broom_0.7.4            evaluate_0.14          fastmap_1.1.0         
##  [70] yaml_2.2.1             knitr_1.31             bit64_4.0.5           
##  [73] fs_1.5.0               zip_2.1.1              caTools_1.18.1        
##  [76] xml2_1.3.2             compiler_4.0.2         rstudioapi_0.13       
##  [79] curl_4.3               png_0.1-7              ggsignif_0.6.0        
##  [82] reprex_1.0.0           geneplotter_1.66.0     stringi_1.5.3         
##  [85] highr_0.8              vctrs_0.3.6            pillar_1.4.7          
##  [88] lifecycle_0.2.0        data.table_1.13.6      bitops_1.0-6          
##  [91] R6_2.5.0               latticeExtra_0.6-29    KernSmooth_2.23-18    
##  [94] gridExtra_2.3          rio_0.5.16             MASS_7.3-53           
##  [97] assertthat_0.2.1       openssl_1.4.3          withr_2.4.1           
## [100] GenomeInfoDbData_1.2.3 hms_1.0.0              rpart_4.1-15          
## [103] coda_0.19-4            rmarkdown_2.6          carData_3.0-4         
## [106] bbmle_1.0.23.1         numDeriv_2016.8-1.1    lubridate_1.7.9.2     
## [109] base64enc_0.1-3